#Setup
#install.packages("tidyverse")
#install.packages("PerformanceAnalytics")
#install.packages("ggfortify")
#install.packages("fastDummies")
library(tidyverse) # core package includes following packages: tidyr, dplyr, ggplot2, readr, purrr, tibble, stringr, forcats
Warnung: vorhergehender Import ‘lifecycle::last_warnings’ durch ‘rlang::last_warnings’ während des Ladens von ‘pillar’ ersetztWarnung: vorhergehender Import ‘lifecycle::last_warnings’ durch ‘rlang::last_warnings’ während des Ladens von ‘tibble’ ersetztRegistered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
Warnung: vorhergehender Import ‘lifecycle::last_warnings’ durch ‘rlang::last_warnings’ während des Ladens von ‘hms’ ersetzt-- Attaching packages ----------------------------------------------------------------------- tidyverse 1.3.1 --
v ggplot2 3.3.5 v purrr 0.3.4
v tibble 3.1.4 v dplyr 1.0.7
v tidyr 1.1.3 v stringr 1.4.0
v readr 2.0.1 v forcats 0.5.1
-- Conflicts -------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(plotly)
Warnung: Paket ‘plotly’ wurde unter R Version 4.1.3 erstelltRegistered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attache Paket: ‘plotly’
Das folgende Objekt ist maskiert ‘package:ggplot2’:
last_plot
Das folgende Objekt ist maskiert ‘package:stats’:
filter
Das folgende Objekt ist maskiert ‘package:graphics’:
layout
library("PerformanceAnalytics") #for correlation
Warnung: Paket ‘PerformanceAnalytics’ wurde unter R Version 4.1.3 erstelltLade nötiges Paket: xts
Warnung: Paket ‘xts’ wurde unter R Version 4.1.3 erstelltLade nötiges Paket: zoo
Warnung: Paket ‘zoo’ wurde unter R Version 4.1.3 erstellt
Attache Paket: ‘zoo’
Die folgenden Objekte sind maskiert von ‘package:base’:
as.Date, as.Date.numeric
Attache Paket: ‘xts’
Die folgenden Objekte sind maskiert von ‘package:dplyr’:
first, last
Attache Paket: ‘PerformanceAnalytics’
Das folgende Objekt ist maskiert ‘package:graphics’:
legend
library(broom) # for model quantification
library(ggfortify) # for visualizing model fits
Warnung: Paket ‘ggfortify’ wurde unter R Version 4.1.3 erstellt
library(fastDummies)
Warnung: Paket ‘fastDummies’ wurde unter R Version 4.1.3 erstellt
library(dplyr)
library(ggplot2)
bike_data <- read_csv("SeoulBikeData.csv",
show_col_types = FALSE,
col_types = cols(Date = col_date(format = "%d/%m/%Y"),
Seasons = col_factor(levels = c("Winter", "Spring", "Summer", "Autumn"),
ordered = TRUE),
Holiday = col_factor(),
"Functioning Day" = col_factor()
))
bike_data <- bike_data %>%
mutate(day = weekdays(Date),
month = months(Date),
day_time = case_when(
Hour >= 5 & Hour < 11 ~ "Morning",
Hour >= 11 & Hour < 15 ~ "Noon",
Hour >= 15 & Hour < 18 ~ "Afternoon",
Hour >= 18 & Hour < 22 ~ "Evening",
Hour < 5 | Hour >= 22 ~ "Night")) %>%
select(Date,month,day,Hour,day_time, Holiday, 'Rented Bike Count',everything())
bike_data$day_time <- factor(bike_data$day_time,
levels = c("Morning", "Noon", "Afternoon", "Evening", "Night"),ordered = TRUE)
bike_data$day <- factor(bike_data$day,
levels = c("Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag", "Samstag", "Sonntag" ))
bike_data$month <- factor(bike_data$month,
levels = c("Januar", "Februar", "März", "April", "Mai", "Juni", "Juli", "August", "September", "Oktober","November", "Dezember"))
#LO1: Performance
grafik_1 <- bike_data %>%
plot_ly(x = ~Seasons) %>%
add_histogram(color = I("darkgreen"), opacity = 0.9) %>%
layout(title = "Total bike count by seasons")
start <- Sys.time()
grafik_1
end <- Sys.time()
print(end - start)
Time difference of 0.473356 secs
grafik_4 <- ggplot(bike_data, aes(`Temperature` ,`Rented Bike Count`, color = `Seasons`))+
geom_jitter(alpha = 0.3)+
scale_fill_grey(start = 0.2, end = 0.8,na.value = "red")+
stat_smooth(method = lm, se = FALSE, color = "red")+
labs(
x = "Temperature in Celsius",
y = "Rented Bikes",
title = "Correlation between temperature and rented bikes")+
theme_minimal()
start <- Sys.time()
grafik_4
end <- Sys.time()
print(end - start)
Time difference of 1.329525 secs
#LO2: Dashboard design principes
bike_data %>%
plot_ly(x = ~Seasons) %>%
add_histogram(color = I("navy"), opacity = 0.9) %>%
layout(title = "Total bike count by seasons")
##Grafik 4:
grafik_4 <- ggplot(bike_data, aes(`Temperature` ,`Rented Bike Count`, color = `Seasons`))+
geom_jitter(alpha = 0.3)+
scale_fill_grey(start = 0.2, end = 0.8,na.value = "red")+
stat_smooth(method = lm, se = FALSE, color = "red")+
labs(
x = "Temperature in Celsius",
y = "Rented Bikes",
title = "Correlation between temperature and rented bikes")+
theme_minimal()
ggplotly(grafik_4)
`geom_smooth()` using formula 'y ~ x'
##Grafik 5
scatter <- ggplot(bike_data,aes(x= Snowfall, y = `Rented Bike Count`))+
geom_jitter(shape=8, (aes(color = Temperature)))+
scale_color_gradient(low="dark blue", high= "light blue")+
facet_wrap(~month)+
ggtitle("Bike rental and the impact of snow")+
theme_minimal()
ggplotly(scatter)
grafik_4 <- plot_ly() %>%
add_trace(bike, x = ~'Hour', y = ~bike_data$'Rented Bike Count', type = 'scatter', mode = 'lines+markers', name = 'Name of Trace 1') %>%
layout(title = 'Plot Title')
grafik_4
#LO3: #LO4: Evaluation